function table_out_tot = extract_activity_data(protein_tot, file_name, tab_name)
% The purpose of this function is to transform the GTP activity data in
% xls(x) format into a standardized table format.
%
% Inputs:
% protein_tot = structure with 3 cell arrays of strings with the proteins
%               in the assay (divided by fields GTPase, LinEff and QuaEff,
%               for the GTPase, linear and quadratic effectors
% file_name   = cell array of strings of size 1 or of the same size as
%               tab_name denoting the file name with the data to read
% tab_name    = cell array of tab names where the data inside file_name is
%               to read
%
% The output is a table with the following variables / columns:
% Run (name) - Time ([h]) - GTP_remaining (normalized such that 100% = 1)
% - Error (of GTP_remaining) -  Buffer_error - GTPase_conc ([uM])
% and then if applicable, X1_conc, X2_conc, with X1, X2 as other protein(s)
% of interest, e.g., Cdc24.
%
% Date: 31-10-2023

warning('off', 'MATLAB:table:RowsAddedExistingVars');
table_out               = cell(numel(tab_name), 1);
protein_str             = cat(2, protein_tot.QuaEff, protein_tot.LinEff);

for t = 1 : numel(tab_name)
    
    table_out{t}        = table(cell(0, 1), zeros(0, 1), zeros(0, 1), zeros(0, 1), zeros(0, 1), ...
                                'VariableNames', {'Run'; 'Time'; 'GTP_remaining'; 'Error'; 'Buffer_error';});
    if numel(file_name) == numel(tab_name)
        file            = t;
    else
        file            = 1;
    end
    
    [~, ~, table_in]    = xlsread(file_name{file}, tab_name{t});
    
    for p = 1 : numel(protein_str)
        table_out{t}.([protein_str{p} '_conc'])    = zeros(0, 1);
    end
    
    try 
        % Find the row of the headers
        row_header          = find(any(strcmp(table_in, 'Run'), 2), 1);
        % Find the columns where the different inputs are
        col_run             = find(strcmp(table_in(row_header, :), 'Run'), 1);
        col_time            = find(strcmp(table_in(row_header, :), 'Time'), 1);
        col_GTP             = find(strcmp(table_in(row_header, :), 'GTP_remaining'), 1);
        col_error           = find(strcmp(table_in(row_header, :), 'Error'), 1);
        col_buffer_error    = find(strcmp(table_in(row_header, :), 'Buffer_error'), 1);
        col_GTPase_conc     = find(strcmp(table_in(row_header, :), [protein_tot.GTPase{1} '_conc']), 1);

        % Find all the rows with inputs
        rows_list           = row_header + find(cellfun(@(x) ~isempty(x) & ischar(x), table_in(row_header + 1 : end, col_run)));

        % Fill the output table with the data   
        table_out{t}.Run(1 : numel(rows_list), 1)   = table_in(rows_list, col_run);
        table_out{t}.Time                           = cell2mat(table_in(rows_list, col_time));
        table_out{t}.GTP_remaining                  = cell2mat(table_in(rows_list, col_GTP));
        table_out{t}.Error                          = cell2mat(table_in(rows_list, col_error));
        table_out{t}.Buffer_error                   = cell2mat(table_in(rows_list, col_buffer_error));
        table_out{t}.([protein_tot.GTPase{1} '_conc'])  = cell2mat(table_in(rows_list, col_GTPase_conc));
        if numel(protein_str) >= 1
            col_protein1    = find(strcmp(table_in(row_header, :), [protein_str{1} '_conc']), 1);
            table_out{t}.([protein_str{1} '_conc']) = cell2mat(table_in(rows_list, col_protein1));
        end
        if numel(protein_str) == 2
            col_protein2    = find(strcmp(table_in(row_header, :), [protein_str{2} '_conc']), 1);
            table_out{t}.([protein_str{2} '_conc']) = cell2mat(table_in(rows_list, col_protein2));
        end
    catch
        error(strcat('Data table format not supported, please check in the Readme', ...
            'how to format the data in the .xlsx file'))
    end
end

table_out_tot       = vertcat(table_out{:});